plus sc10x

run cellranger with Spp1-EGFP added reference index

load dependancies

three models: EAE, AD, SIMPLE
each has one GFP(Spp1+), MIG(total microglia), CTL(WT)
AD.GFP.2 is from homozygous model while others are all heterozygous, so need to be excluded before final clustering

loading 65k cells, CX3CR1+
demo, cellranger called 39,425 cells
plus, cellranger called 43,176 cells

load 10x data

filt.10x <- Read10X(data.dir = "../output_plus_exogene/filtered_feature_bc_matrix/")
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
GEX <- filt.10x$`Gene Expression`
FB <- filt.10x$`Antibody Capture`

check datasets

dim(GEX)
## [1] 32286 43176
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
##         AAACCCAAGAAGGATG-1 AAACCCAAGAAGTCCG-1 AAACCCAAGAGAAGGT-1
## Xkr4                     .                  .                  .
## Gm1992                   .                  .                  .
## Gm19938                  .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
##         AAACCCAAGCACTCAT-1 AAACCCAAGTATGTAG-1 AAACCCAAGTCATCCA-1
## Xkr4                     .                  .                  .
## Gm1992                   .                  .                  .
## Gm19938                  .                  .                  .
## Gm37381                  .                  .                  .
## Rp1                      .                  .                  .
## Sox17                    .                  .                  .
dim(FB)
## [1]    10 43176
FB[,1:6]
## 10 x 6 sparse Matrix of class "dgCMatrix"
##          AAACCCAAGAAGGATG-1 AAACCCAAGAAGTCCG-1 AAACCCAAGAGAAGGT-1
## EAE.GFP                  15                 19                 14
## EAE.MIG                  24                 11                 13
## EAE.CTL                 303                  6                 13
## AD.GFP.1                  6                  7                229
## AD.GFP.2                  5                  1                  3
## AD.MIG                   11                  7                  7
## AD.CTL                    2                  3                  2
## SIM.GFP                  10                  2                  6
## SIM.MIG                   8                 64                  3
## SIM.CTL                   8                  3                  6
##          AAACCCAAGCACTCAT-1 AAACCCAAGTATGTAG-1 AAACCCAAGTCATCCA-1
## EAE.GFP                   6                197                 25
## EAE.MIG                  15                 15                 31
## EAE.CTL                   4                184                 25
## AD.GFP.1                125                  4                 20
## AD.GFP.2                  2                  5                 12
## AD.MIG                    9                  9                 11
## AD.CTL                    1                  1                  6
## SIM.GFP                   5                 10                  8
## SIM.MIG                   6                 88                 12
## SIM.CTL                   3                  3                138
rowSums(FB)
##  EAE.GFP  EAE.MIG  EAE.CTL AD.GFP.1 AD.GFP.2   AD.MIG   AD.CTL  SIM.GFP 
##  2540456  4085857  1197867  1367832   618839  1171724   371447   832225 
##  SIM.MIG  SIM.CTL 
##   907463   482995
rowSums(FB>0)
##  EAE.GFP  EAE.MIG  EAE.CTL AD.GFP.1 AD.GFP.2   AD.MIG   AD.CTL  SIM.GFP 
##    43132    43138    43045    42173    38869    42474    35633    42657 
##  SIM.MIG  SIM.CTL 
##    41787    40281

FB

# build seurat object
FB.seur <- CreateSeuratObject(counts = FB,project = "Spp1ND")

FB.seur
## An object of class Seurat 
## 10 features across 43176 samples within 1 assay 
## Active assay: RNA (10 features, 0 variable features)
rownames(FB.seur)
##  [1] "EAE.GFP"  "EAE.MIG"  "EAE.CTL"  "AD.GFP.1" "AD.GFP.2" "AD.MIG"  
##  [7] "AD.CTL"   "SIM.GFP"  "SIM.MIG"  "SIM.CTL"

normalization

perform Seurat ’Demultiplexing with hashtag oligos(HTOs)
ref https://satijalab.org/seurat/articles/hashing_vignette.html

# normalize
FB.seur <- NormalizeData(FB.seur)
FB.seur <- FindVariableFeatures(FB.seur, selection.method = "mean.var.plot")
FB.seur <- ScaleData(FB.seur, features = VariableFeatures(FB.seur))
## Centering and scaling data matrix
# independent assay for HTO data  
FB.seur[['HTO']] <- CreateAssayObject(counts = FB)
FB.seur <- NormalizeData(FB.seur, assay = 'HTO', normalization.method = 'CLR')
## Normalizing across features

check demux

first define FB colors based on conditions

scales::show_col(ggsci::pal_igv("default")(49))

color.FB <- ggsci::pal_igv("default")(49)[c(8,33,40,
                                            34,26,1,28,
                                            2,43,18)]

#2,13,33,1,15,
#34,26,28,40,18

level.FB <- c(rownames(FB.seur))

color.FBraw <-  c("#FF6C91","lightgrey",color.FB)
scales::show_col(color.FBraw, ncol = 5)

scales::show_col(color.FB, ncol = 5)

par(mfrow=c(2,5))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
plot(sort(t(FB)[,7]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
plot(sort(t(FB)[,8]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
plot(sort(t(FB)[,9]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[9])
plot(sort(t(FB)[,10]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[10])

range ‘q’

q0.50 ~ 0.99
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,5))


plot(table.demux$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux[,grep("EAE|AD|SIM",colnames(table.demux))]), type="p", col="#306000", pch=16, ylab="Singlet")

plot(table.demux$quantile, pch=16, ylab="mod-quantile")
plot.new()

plot(table.demux[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux)[2+1])
plot(table.demux[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux)[2+2])
plot(table.demux[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux)[2+3])
plot(table.demux[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux)[2+4])
plot(table.demux[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux)[2+5])

plot(table.demux[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux)[2+6])
plot(table.demux[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux)[2+7])
plot(table.demux[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux)[2+8])
plot(table.demux[,2+9], type="p", col=color.FB[9], pch=16, ylab=colnames(table.demux)[2+9])
plot(table.demux[,2+10], type="p", col=color.FB[10], pch=16, ylab=colnames(table.demux)[2+10])

q0.9900 ~ 0.9999
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,5))


plot(table.demux.s$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux.s$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux.s[,grep("EAE|AD|SIM",colnames(table.demux.s))]), type="p", col="#306000", pch=16, ylab="Singlet")

plot(table.demux.s$quantile, pch=16, ylab="mod-quantile")
plot.new()

plot(table.demux.s[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux.s)[2+1])
plot(table.demux.s[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux.s)[2+2])
plot(table.demux.s[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux.s)[2+3])
plot(table.demux.s[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux.s)[2+4])
plot(table.demux.s[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux.s)[2+5])

plot(table.demux.s[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux.s)[2+6])
plot(table.demux.s[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux.s)[2+7])
plot(table.demux.s[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux.s)[2+8])
plot(table.demux.s[,2+9], type="p", col=color.FB[9], pch=16, ylab=colnames(table.demux.s)[2+9])
plot(table.demux.s[,2+10], type="p", col=color.FB[10], pch=16, ylab=colnames(table.demux.s)[2+10])

demultiplexing

(tags1-6/8/9 set q0.998, tags7/10 q0.99)

# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.994)
## Cutoff for EAE.GFP : 40 reads
## Cutoff for EAE.MIG : 55 reads
## Cutoff for EAE.CTL : 32 reads
## Cutoff for AD.GFP.1 : 19 reads
## Cutoff for AD.GFP.2 : 13 reads
## Cutoff for AD.MIG : 22 reads
## Cutoff for AD.CTL : 14 reads
## Cutoff for SIM.GFP : 26 reads
## Cutoff for SIM.MIG : 23 reads
## Cutoff for SIM.CTL : 22 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    15237     1192    26747
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative  EAE.GFP  EAE.MIG  EAE.CTL AD.GFP.1 AD.GFP.2   AD.MIG 
##    15237     1192     3042     3083     3014     3003     1828     2862 
##   AD.CTL  SIM.GFP  SIM.MIG  SIM.CTL 
##     2596     1579     3013     2727
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.996)
## Cutoff for EAE.GFP : 42 reads
## Cutoff for EAE.MIG : 59 reads
## Cutoff for EAE.CTL : 34 reads
## Cutoff for AD.GFP.1 : 20 reads
## Cutoff for AD.GFP.2 : 13 reads
## Cutoff for AD.MIG : 23 reads
## Cutoff for AD.CTL : 15 reads
## Cutoff for SIM.GFP : 27 reads
## Cutoff for SIM.MIG : 25 reads
## Cutoff for SIM.CTL : 24 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    14885     1325    26966
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative  EAE.GFP  EAE.MIG  EAE.CTL AD.GFP.1 AD.GFP.2   AD.MIG 
##    14885     1325     3089     3121     3035     3039     1855     2891 
##   AD.CTL  SIM.GFP  SIM.MIG  SIM.CTL 
##     2618     1596     3023     2699
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.998)
## Cutoff for EAE.GFP : 46 reads
## Cutoff for EAE.MIG : 64 reads
## Cutoff for EAE.CTL : 38 reads
## Cutoff for AD.GFP.1 : 22 reads
## Cutoff for AD.GFP.2 : 15 reads
## Cutoff for AD.MIG : 26 reads
## Cutoff for AD.CTL : 17 reads
## Cutoff for SIM.GFP : 30 reads
## Cutoff for SIM.MIG : 27 reads
## Cutoff for SIM.CTL : 26 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    14340     1542    27294
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative  EAE.GFP  EAE.MIG  EAE.CTL AD.GFP.1 AD.GFP.2   AD.MIG 
##    14340     1542     3166     3196     3062     3099     1874     2935 
##   AD.CTL  SIM.GFP  SIM.MIG  SIM.CTL 
##     2626     1610     3057     2669
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.99)
## Cutoff for EAE.GFP : 37 reads
## Cutoff for EAE.MIG : 51 reads
## Cutoff for EAE.CTL : 30 reads
## Cutoff for AD.GFP.1 : 17 reads
## Cutoff for AD.GFP.2 : 12 reads
## Cutoff for AD.MIG : 20 reads
## Cutoff for AD.CTL : 13 reads
## Cutoff for SIM.GFP : 24 reads
## Cutoff for SIM.MIG : 21 reads
## Cutoff for SIM.CTL : 20 reads
table(FB.seur$HTO_classification.global)
## 
##  Doublet Negative  Singlet 
##    15819     1083    26274
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
## 
##  Doublet Negative  EAE.GFP  EAE.MIG  EAE.CTL AD.GFP.1 AD.GFP.2   AD.MIG 
##    15819     1083     2983     3021     2951     2936     1802     2818 
##   AD.CTL  SIM.GFP  SIM.MIG  SIM.CTL 
##     2552     1548     2959     2704
# tags q0.99
#cutoff.FB <- c(37,51,30,17,12,
#               20,13,24,21,20)

# tags1-6/8/9 set q0.996
#cutoff.FB <- c(42,59,34,20,13,
#               23,13,27,25,20)

# tags1-6/8/9 set q0.998
cutoff.FB <- c(46,64,38,22,15,
               26,13,30,27,20)

names(cutoff.FB) <- rownames(FB.seur)
cutoff.FB
##  EAE.GFP  EAE.MIG  EAE.CTL AD.GFP.1 AD.GFP.2   AD.MIG   AD.CTL  SIM.GFP 
##       46       64       38       22       15       26       13       30 
##  SIM.MIG  SIM.CTL 
##       27       20
par(mfrow=c(2,5))

#plot(sort(rowSums(t(FB))),
#     xlab="reordered cells", 
#     ylab="UMI counts", log="xy",
#     main="sum")


plot(sort(t(FB)[,1]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])
plot(sort(t(FB)[,5]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])

plot(sort(t(FB)[,6]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])
plot(sort(t(FB)[,9]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[9])
abline(h=cutoff.FB[9],col = color.FB[9])
plot(sort(t(FB)[,10]),
     xlab="reordered cells", 
     ylab="UMI counts", log="xy",
     main=rownames(FB)[10])
abline(h=cutoff.FB[10],col = color.FB[10])

custom.Demux <- function(FBobj,FBcutoff){
  # define decision matrix
  dd <- FBobj@assays[['HTO']]@counts
  dd <- apply(dd, 2, function(x){
    x > FBcutoff
  })
  x1 <- colSums(dd)
  x1[x1>1] <- "Doublet"
  x1[x1==0] <- "Negative"
  x1[x1==1] <- "Singlet"

  x2 <- x1
  
  bc.slt  <- names(x1)[x1=="Singlet"]
  
  for(bc in bc.slt){
    x2[bc] <- rownames(dd)[dd[,bc]]
  }
  
  FBdf <- data.frame(HTO_classification.global=factor(x1,
                                                      levels = c("Doublet","Negative","Singlet")),
                     hash.ID=factor(x2,
                                    levels = c("Doublet","Negative",rownames(dd))))
  return(FBdf)
}

df.FB <- custom.Demux(FB.seur,cutoff.FB)
dim(df.FB)
## [1] 43176     2
df.FB[1:10,]
##                    HTO_classification.global  hash.ID
## AAACCCAAGAAGGATG-1                   Singlet  EAE.CTL
## AAACCCAAGAAGTCCG-1                   Singlet  SIM.MIG
## AAACCCAAGAGAAGGT-1                   Singlet AD.GFP.1
## AAACCCAAGCACTCAT-1                   Singlet AD.GFP.1
## AAACCCAAGTATGTAG-1                   Doublet  Doublet
## AAACCCAAGTCATCCA-1                   Singlet  SIM.CTL
## AAACCCAAGTCGGCAA-1                   Doublet  Doublet
## AAACCCAAGTCGGCCT-1                   Singlet  EAE.GFP
## AAACCCAAGTGTTGTC-1                   Doublet  Doublet
## AAACCCACAAACTAAG-1                   Singlet  SIM.MIG
table(df.FB)
##                          hash.ID
## HTO_classification.global Doublet Negative EAE.GFP EAE.MIG EAE.CTL AD.GFP.1
##                  Doublet    14706        0       0       0       0        0
##                  Negative       0     1280       0       0       0        0
##                  Singlet        0        0    3128    3134    3023     3052
##                          hash.ID
## HTO_classification.global AD.GFP.2 AD.MIG AD.CTL SIM.GFP SIM.MIG SIM.CTL
##                  Doublet         0      0      0       0       0       0
##                  Negative        0      0      0       0       0       0
##                  Singlet      1853   2901   2665    1584    3014    2836
FB.seur@meta.data[,c("HTO_classification.global","hash.ID")] <- df.FB
FB.seur@meta.data[1:4,]
##                    orig.ident nCount_RNA nFeature_RNA nCount_HTO nFeature_HTO
## AAACCCAAGAAGGATG-1     Spp1ND        392           10        392           10
## AAACCCAAGAAGTCCG-1     Spp1ND        123           10        123           10
## AAACCCAAGAGAAGGT-1     Spp1ND        296           10        296           10
## AAACCCAAGCACTCAT-1     Spp1ND        176           10        176           10
##                    HTO_maxID HTO_secondID HTO_margin HTO_classification
## AAACCCAAGAAGGATG-1   EAE.CTL      SIM.CTL   2.267075            EAE.CTL
## AAACCCAAGAAGTCCG-1   SIM.MIG      EAE.GFP   1.596241            SIM.MIG
## AAACCCAAGAGAAGGT-1  AD.GFP.1      SIM.CTL   2.543885           AD.GFP.1
## AAACCCAAGCACTCAT-1  AD.GFP.1       AD.MIG   2.033227           AD.GFP.1
##                    HTO_classification.global  hash.ID
## AAACCCAAGAAGGATG-1                   Singlet  EAE.CTL
## AAACCCAAGAAGTCCG-1                   Singlet  SIM.MIG
## AAACCCAAGAGAAGGT-1                   Singlet AD.GFP.1
## AAACCCAAGCACTCAT-1                   Singlet AD.GFP.1
## if all using same q, then run this chunk instead of several custom ones above    
#
#FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
#                                            levels = c("Doublet","Negative","Singlet"))
#FB.seur$hash.ID <- factor(as.character(FB.seur$hash.ID),
#                          levels = c("Doublet","Negative",rownames(FB.seur)))
sl_stat <- table(FB.seur$HTO_classification.global)
barplot(sl_stat,ylim = c(0,32500),col = c("#FF6C91","lightgrey","#7CAE00"),main = "Feature Barcode statistics")
text(x=1:3*1.2-0.5,y=sl_stat+1680,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

RidgePlot

with ridge plots, raw
FB.seur@active.ident <- factor(FB.seur$HTO_maxID,levels=rownames(FB.seur))

RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]),same.y.lims= TRUE, ncol=5,
          cols = color.FB) 

with ridge plots, filtered
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]), ncol=5,same.y.lims= TRUE,
          cols = c("#FF6C91","lightgrey",color.FB),group.by = "hash.ID") 

tSNE for HTOs

# first, remove negative cells from th object
#FB.seur.subset <- FB.seur

# Calculate a tSNE embedding of the HTO data
#DefaultAssay(FB.seur.subset) <- "HTO"

#FB.seur.subset <- ScaleData(FB.seur.subset, features = rownames(FB.seur.subset), verbose = FALSE)
#FB.seur.subset <- RunPCA(FB.seur.subset, features = rownames(FB.seur.subset), approx = FALSE)
#FB.seur.subset <- RunTSNE(FB.seur.subset, dims = 1:10, perplexity = 500, check_duplicates = FALSE)


#saveRDS(FB.seur.subset, "FB0829.seur.subset.rds")
FB.seur.subset <- readRDS("FB0829.seur.subset.rds")
multiplot(
DimPlot(FB.seur.subset, order =  rev(rownames(FB.seur)),
        cols = color.FB) + labs(title = "FB Max_ID"), 
DimPlot(FB.seur.subset, order = rev(c("Negative",rownames(FB.seur),"Doublet")), 
        group.by = 'hash.ID', label = F,
        cols = c("lightgrey",color.FB,"#FF6C91")) + 
  labs(title = "FB Filtered_ID") + theme(plot.title = element_text(hjust = 0)) ,
cols = 1)

stat

FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
                                            levels = c("Doublet","Negative","Singlet"))
Idents(FB.seur) <- "HTO_classification.global"
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38"))

VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38")) + geom_jitter(alpha=0.015)

table(FB.seur@meta.data$hash.ID)
## 
##  Doublet Negative  EAE.GFP  EAE.MIG  EAE.CTL AD.GFP.1 AD.GFP.2   AD.MIG 
##    14706     1280     3128     3134     3023     3052     1853     2901 
##   AD.CTL  SIM.GFP  SIM.MIG  SIM.CTL 
##     2665     1584     3014     2836
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID)
barplot(sl_stat,ylim = c(0,23800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:12*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID), las=3, cex.axis=0.85)
text(x=1:12*1.2-0.45,y=sl_stat+1575,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

GEX

mainly follow https://satijalab.org/seurat/articles/pbmc3k_tutorial.html

GEX.seur <- CreateSeuratObject(counts = GEX,
                               min.cells = 3,
                               min.features = 200,
                               project = "Spp1ND")
GEX.seur
## An object of class Seurat 
## 17590 features across 43175 samples within 1 assay 
## Active assay: RNA (17590 features, 0 variable features)

filtering

MT genes

grep("^mt-",rownames(GEX),value = T)
##  [1] "mt-Nd1"  "mt-Nd2"  "mt-Co1"  "mt-Co2"  "mt-Atp8" "mt-Atp6" "mt-Co3" 
##  [8] "mt-Nd3"  "mt-Nd4l" "mt-Nd4"  "mt-Nd5"  "mt-Nd6"  "mt-Cytb"
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene)

RB_gene <- grep("^Rps|^Rpl",rownames(GEX.seur),value = T)
GEX.seur[["percent.rb"]] <- PercentageFeatureSet(GEX.seur, features = RB_gene)

# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.01)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc

add FB info

GEX.seur$FB.info <- FB.seur@meta.data[rownames(GEX.seur@meta.data),"hash.ID"]
#head(GEX.seur@meta.data)
table(GEX.seur$FB.info)
## 
##  Doublet Negative  EAE.GFP  EAE.MIG  EAE.CTL AD.GFP.1 AD.GFP.2   AD.MIG 
##    14706     1280     3127     3134     3023     3052     1853     2901 
##   AD.CTL  SIM.GFP  SIM.MIG  SIM.CTL 
##     2665     1584     3014     2836
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

GEX.seur <- subset(GEX.seur, subset = FB.info!="Doublet" & FB.info!="Negative")
GEX.seur
## An object of class Seurat 
## 17590 features across 27189 samples within 1 assay 
## Active assay: RNA (17590 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = c("#FF6C91","lightgrey",color.FB),
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

filtering

GEX.seur <- subset(GEX.seur, subset = percent.mt < 10 & nFeature_RNA > 650 & nFeature_RNA < 3000 & nCount_RNA < 10000)
GEX.seur
## An object of class Seurat 
## 17590 features across 24766 samples within 1 assay 
## Active assay: RNA (17590 features, 0 variable features)

filtered obj

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0.1, split.by = "FB.info")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        #ncol = 3, 
        ncol = 2,
        cols = color.FBraw,
        pt.size = 0, split.by = "FB.info")

plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt") 
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") 
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb") 
plota + plotb + plotc

par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID)
barplot(sl_stat,ylim = c(0,18800),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:12*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID), las=3, cex.axis=0.85)
text(x=1:12*1.2-0.45,y=sl_stat+1375,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

par(mar=c(6,4,2,3))
sl_stat <- table(GEX.seur$FB.info)
barplot(sl_stat,ylim = c(0,4200),
        col = c("#FF6C91","lightgrey",color.FB),
        main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:12*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID), las=3, cex.axis=0.85)
text(x=1:12*1.2-0.45,y=sl_stat+275,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)

check cellcycle

s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))

GEX.seur <- CellCycleScoring(GEX.seur,
                             s.features = s.genes,
                             g2m.features = g2m.genes)
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), group.by = "FB.info", 
    ncol = 2, pt.size = 0.1)

GEX.seur.cc <- GEX.seur

GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)

GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')

Markers and Clusters

Normalizing

GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)

# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2

head(VariableFeatures(GEX.seur), 200)
##   [1] "Cxcl10"        "Spp1"          "Ccl4"          "Cxcl13"       
##   [5] "Ccl5"          "Cd74"          "H2-Aa"         "Cxcl9"        
##   [9] "H2-Eb1"        "Ccl3"          "Spp1-EGFP"     "Mmp12"        
##  [13] "Ccl12"         "H2-Ab1"        "Pf4"           "Mrc1"         
##  [17] "Gpnmb"         "S100a6"        "Ccl2"          "Rsad2"        
##  [21] "Fn1"           "Cck"           "Top2a"         "Cst7"         
##  [25] "Ttr"           "Mki67"         "Lgals3"        "Lpl"          
##  [29] "Iigp1"         "Fabp5"         "Il1b"          "Ccl6"         
##  [33] "Lyve1"         "Mgl2"          "Apoc1"         "Ifi27l2a"     
##  [37] "Cd209f"        "Egr1"          "Saa3"          "Gdf15"        
##  [41] "Ccr7"          "S100a4"        "Il1rn"         "Ifitm3"       
##  [45] "Ccl22"         "Cxcl2"         "Wfdc17"        "Ifit3"        
##  [49] "Napsa"         "Apoc4"         "Ptgds"         "Prc1"         
##  [53] "Ube2c"         "Plac8"         "Ly6c2"         "Egr3"         
##  [57] "Ms4a7"         "Ch25h"         "AA467197"      "Cd163"        
##  [61] "Igfbp5"        "F13a1"         "Apoe"          "Stmn1"        
##  [65] "Cd209a"        "Pclaf"         "Ifnb1"         "Ifit2"        
##  [69] "Hbb-bs"        "Lyz2"          "Snca"          "Hist1h1b"     
##  [73] "Serpinb6b"     "Cenpf"         "Ifit1"         "Ccl7"         
##  [77] "Tnf"           "Hba-a2"        "Hba-a1"        "Hbb-bt"       
##  [81] "Igf1"          "Alas2"         "Ccl8"          "Cd5l"         
##  [85] "Gm1673"        "Cd200"         "Ank"           "Ccl17"        
##  [89] "Nusap1"        "Cd209g"        "Ly6k"          "Dqx1"         
##  [93] "Cdkn1a"        "Isg15"         "Clu"           "Ifi203"       
##  [97] "Hmmr"          "Aldh1a3"       "Gm45184"       "Crip1"        
## [101] "Cox6a2"        "Gm26885"       "A330032B11Rik" "Cd209b"       
## [105] "Ms4a4b"        "Clec4d"        "Gm43409"       "Slc7a11"      
## [109] "Wdcp"          "Vim"           "Iglc2"         "Clec10a"      
## [113] "Itgax"         "Ramp3"         "Cd52"          "Birc5"        
## [117] "Clec4e"        "Glipr2"        "Ahnak"         "Csf1"         
## [121] "Cybb"          "Enpp2"         "Ifi205"        "Cenpa"        
## [125] "Cd72"          "Nr4a1"         "Ctla2a"        "Plbd1"        
## [129] "Postn"         "Syngr1"        "Gda"           "Lgals1"       
## [133] "Gm49756"       "Emb"           "Ly6a"          "B230312C02Rik"
## [137] "Atf3"          "Gbp2"          "Rnd3"          "Olfr1369-ps1" 
## [141] "Gnas"          "Cbr2"          "Ctsd"          "Ifitm6"       
## [145] "Il2rb"         "Prdx1"         "Gm26737"       "Nkg7"         
## [149] "Dennd5b"       "Stat4"         "Adgre5"        "C4b"          
## [153] "Kit"           "Bst2"          "Plp1"          "Tnfrsf18"     
## [157] "Tyrobp"        "Serpinb1a"     "Serpina3g"     "Spn"          
## [161] "Trbc2"         "Ramp1"         "Lilrb4a"       "Cd69"         
## [165] "Mif"           "Edn1"          "Ctsb"          "Cd38"         
## [169] "Pbld1"         "Skap1"         "Aplp2"         "Tcap"         
## [173] "S100a11"       "Cd300e"        "Krt80"         "Trbc1"        
## [177] "Mndal"         "Clec4b1"       "Chac1"         "Neil3"        
## [181] "B930036N10Rik" "Ifit3b"        "Il4i1"         "Slfn5"        
## [185] "Folr1"         "Traf1"         "Ccnb2"         "Ifi204"       
## [189] "Ccl9"          "Ifitm2"        "Ccnd2"         "Slco3a1"      
## [193] "Cdca8"         "Pbk"           "Hif1a"         "Ccr1"         
## [197] "Acod1"         "Lgals3bp"      "Gm29773"       "Ifi207"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix

PCA

# exclude MT genes  and more 

#DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist",rownames(GEX.seur),value = T)

DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AA|^AW|^BC|^Gm|^Hist|Rik$|-ps",
            rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))


VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
                                                c(MT_gene,
                                                  DIG, 
                                                  CC_gene) )

GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1 
## Positive:  Crybb1, Ecscr, Hpgd, Lrba, Slc2a5, Camk2n1, Stab1, Lst1, Filip1l, Slc40a1 
##     Fcrls, Il7r, Sox4, Ddit4, Bank1, Tent5a, Serpinf1, Btg2, Fam102b, Klf7 
##     Nav3, Gbp7, Klk8, Upk1b, Cask, Hlf, Fry, Tsix, Nfkbia, Tmem52 
## Negative:  Cst7, Apoe, Ctsd, Fabp5, Ctsb, Tyrobp, Fth1, Lpl, Igf1, Lyz2 
##     Cd63, Ctsz, Cd52, Ank, Mif, Gnas, Ftl1, Csf1, B2m, Npc2 
##     Itgax, Cd9, Ccl3, Hif1a, Cox6a2, Eef1a1, Lgals3bp, Trem2, Aplp2, Syngr1 
## PC_ 2 
## Positive:  Trem2, Ctsd, Ank, Tyrobp, Ccl6, Ramp1, Igf1, Serpine2, Baiap2l2, Itgax 
##     Hexa, Mamdc2, Syngr1, Cd68, Hpgd, Dkk2, Cst7, Cadm1, Cd9, St8sia6 
##     Fabp5, Creg1, Scd2, Aplp2, Lpl, Lgi2, Arap2, Slc12a2, Hif1a, Crip1 
## Negative:  Ifit3, Isg15, Iigp1, Ccl12, Ifit2, Ifitm3, Ifi204, Ifit1, Rsad2, Ifi211 
##     Irf7, Ccl2, Slfn5, Ifi207, Ifi206, Zbp1, Oasl2, Ifit3b, Oasl1, Stat1 
##     Slfn2, Ifi209, Rtp4, Fgl2, Usp18, Ifi213, Fcgr4, Gbp2, Bst2, Cxcl10 
## PC_ 3 
## Positive:  Trem2, Ctsd, Tyrobp, Ifit3, Ifit2, Serpine2, Hexa, Ifit1, Ifit3b, Oasl2 
##     Ramp1, Ctsl, Slfn5, Cst7, Baiap2l2, Rsad2, Rtp4, Isg15, Syngr1, Grn 
##     Ly86, Usp18, Lgi2, B2m, Ifi206, Timp2, Fau, Zfas1, Irf7, Ifi27l2a 
## Negative:  Lgals3, Fn1, Lilrb4a, Vim, Pclaf, Id2, Ccl4, Cd72, Bcl2a1d, Lilr4b 
##     Iqgap1, Emp3, Atf3, Cd36, Nes, Stmn1, Tpm4, Spp1-EGFP, Glipr1, Prc1 
##     Spc24, Lgals1, Fxyd5, Spp1, Cdkn1a, Lpl, Tagln2, Cybb, Mmp12, Actg1 
## PC_ 4 
## Positive:  Stmn1, Fcrls, Fam102b, Pclaf, Serpine2, Slc2a5, Crybb1, Hpgd, Prc1, Spc24 
##     Grn, Ecscr, Lrba, Calr, Pbk, Tk1, Sox4, Cdk6, Esco2, Ifit2 
##     Lockd, Knl1, Dek, Scd2, Fkbp2, Smc2, Ccna2, Psat1, Ezh2, Rsad2 
## Negative:  Apoe, H2-D1, Fth1, Cd74, Fau, H2-K1, H2-Aa, H2-Ab1, Lyz2, H2-Eb1 
##     H2-Q7, B2m, C3, C4b, Eef1a1, Apoc1, Cd52, Ms4a7, Eef1b2, Cybb 
##     Ftl1, Ccl5, Gas5, Cd63, H2-Q6, Arhgap15, Pla2g7, Slamf7, Ifi30, Apoc2 
## PC_ 5 
## Positive:  Pclaf, Ly86, Prc1, Stmn1, Spc24, B2m, Pbk, Tk1, Fau, Esco2 
##     Knl1, Lockd, Racgap1, Ccna2, Mis18bp1, Cd52, H2-D1, Ifi27l2a, Ccnf, Smc2 
##     Kif15, Spc25, Ezh2, Asf1b, H2-Q7, Knstrn, Trim59, Dut, H2-K1, Tspo 
## Negative:  Lilrb4a, Lgals3, Ccl4, Atf3, Rsad2, Ifit1, Ifit3, Ccl3, Ifit3b, Plau 
##     Ms4a7, Slc15a3, Plaur, Ifit2, Slfn5, Ifi207, Rgs1, Fam20c, Fcrls, Lilr4b 
##     Dab2, Iqgap1, Gpnmb, Csf1, Igf1, Rab7b, Ch25h, Stab1, Lpl, Id2
length(VariableFeatures(GEX.seur))
## [1] 1199
head(VariableFeatures(GEX.seur),300)
##   [1] "Cxcl10"    "Spp1"      "Ccl4"      "Cxcl13"    "Ccl5"      "Cd74"     
##   [7] "H2-Aa"     "Cxcl9"     "H2-Eb1"    "Ccl3"      "Spp1-EGFP" "Mmp12"    
##  [13] "Ccl12"     "H2-Ab1"    "Pf4"       "Mrc1"      "Gpnmb"     "S100a6"   
##  [19] "Ccl2"      "Rsad2"     "Fn1"       "Cck"       "Cst7"      "Ttr"      
##  [25] "Lgals3"    "Lpl"       "Iigp1"     "Fabp5"     "Il1b"      "Ccl6"     
##  [31] "Lyve1"     "Mgl2"      "Apoc1"     "Ifi27l2a"  "Cd209f"    "Egr1"     
##  [37] "Saa3"      "Gdf15"     "Ccr7"      "S100a4"    "Il1rn"     "Ifitm3"   
##  [43] "Ccl22"     "Cxcl2"     "Wfdc17"    "Ifit3"     "Napsa"     "Apoc4"    
##  [49] "Ptgds"     "Prc1"      "Plac8"     "Ly6c2"     "Egr3"      "Ms4a7"    
##  [55] "Ch25h"     "Cd163"     "Igfbp5"    "F13a1"     "Apoe"      "Stmn1"    
##  [61] "Cd209a"    "Pclaf"     "Ifnb1"     "Ifit2"     "Lyz2"      "Snca"     
##  [67] "Serpinb6b" "Ifit1"     "Ccl7"      "Tnf"       "Igf1"      "Alas2"    
##  [73] "Ccl8"      "Cd5l"      "Cd200"     "Ank"       "Ccl17"     "Cd209g"   
##  [79] "Ly6k"      "Dqx1"      "Cdkn1a"    "Isg15"     "Clu"       "Ifi203"   
##  [85] "Aldh1a3"   "Crip1"     "Cox6a2"    "Cd209b"    "Ms4a4b"    "Clec4d"   
##  [91] "Slc7a11"   "Wdcp"      "Vim"       "Clec10a"   "Itgax"     "Ramp3"    
##  [97] "Cd52"      "Clec4e"    "Glipr2"    "Ahnak"     "Csf1"      "Cybb"     
## [103] "Enpp2"     "Ifi205"    "Cd72"      "Nr4a1"     "Ctla2a"    "Plbd1"    
## [109] "Postn"     "Syngr1"    "Gda"       "Lgals1"    "Emb"       "Ly6a"     
## [115] "Atf3"      "Gbp2"      "Rnd3"      "Gnas"      "Cbr2"      "Ctsd"     
## [121] "Ifitm6"    "Il2rb"     "Prdx1"     "Nkg7"      "Dennd5b"   "Stat4"    
## [127] "Adgre5"    "C4b"       "Kit"       "Bst2"      "Plp1"      "Tnfrsf18" 
## [133] "Tyrobp"    "Serpinb1a" "Serpina3g" "Spn"       "Ramp1"     "Lilrb4a"  
## [139] "Cd69"      "Mif"       "Edn1"      "Ctsb"      "Cd38"      "Pbld1"    
## [145] "Skap1"     "Aplp2"     "Tcap"      "S100a11"   "Cd300e"    "Krt80"    
## [151] "Mndal"     "Clec4b1"   "Chac1"     "Neil3"     "Ifit3b"    "Il4i1"    
## [157] "Slfn5"     "Folr1"     "Ifi204"    "Ccl9"      "Ifitm2"    "Ccnd2"    
## [163] "Slco3a1"   "Pbk"       "Hif1a"     "Ccr1"      "Acod1"     "Lgals3bp" 
## [169] "Ifi207"    "Cd3d"      "Ifi44"     "Ifi211"    "Flt1"      "Olr1"     
## [175] "Serpine2"  "Cd93"      "Fth1"      "Cd63"      "Il2ra"     "Sprr1a"   
## [181] "Cp"        "Fxyd5"     "Nudt17"    "Trib3"     "Ccnb1"     "Ecrg4"    
## [187] "Knl1"      "Pou3f1"    "Fxyd1"     "Fcgr4"     "Mamdc2"    "Ftl1"     
## [193] "Ly75"      "Lyz1"      "Serpinb8"  "Id2"       "Ctsz"      "Oasl1"    
## [199] "Colec12"   "Clec12a"   "Cxcl14"    "Lockd"     "Ccr2"      "Sirpb1c"  
## [205] "Enah"      "Gadd45b"   "Phlda3"    "Irf7"      "Cd36"      "Hp"       
## [211] "Dkk2"      "Serpinh1"  "Bex1"      "Klrd1"     "Esco2"     "Ms4a4c"   
## [217] "Ifitm1"    "Baiap2l2"  "Cacnb3"    "Ntrk2"     "Klra7"     "Rhox5"    
## [223] "Ctsl"      "Pglyrp1"   "Ctsw"      "Lgi2"      "Nedd4"     "Capg"     
## [229] "Xcl1"      "Oasl2"     "Adgrg6"    "Rgs1"      "Trem2"     "Rab3c"    
## [235] "Tnip3"     "Cstb"      "Timd4"     "Gapdh"     "Ccdc63"    "Gpr141"   
## [241] "Ear2"      "Gria2"     "H2-Q7"     "Aspm"      "Calca"     "Kcnk9"    
## [247] "Rep15"     "Cspg4"     "Scd2"      "Dab2"      "Trpm3"     "Mt1"      
## [253] "Cd9"       "Vcam1"     "Ccna2"     "Bcl2l14"   "Slamf7"    "Gimap3"   
## [259] "Il12b"     "Gdf3"      "Kcnq1ot1"  "Shcbp1"    "Grb14"     "Crlf2"    
## [265] "Pkm"       "Spc24"     "Net1"      "Nes"       "Axl"       "Spag5"    
## [271] "Cfp"       "Igfbp2"    "Adamts1"   "Timp2"     "Pld3"      "Spint2"   
## [277] "Atp6v0d2"  "Gpr84"     "S100a10"   "Loxl2"     "Hbegf"     "Rcan1"    
## [283] "Cd7"       "Tnfrsf11b" "Bcl2a1d"   "Zbp1"      "Olfr889"   "Ptgs2"    
## [289] "Fabp3"     "Tk1"       "Fabp4"     "Tlr2"      "Pcp4"      "Ifi208"   
## [295] "Egfl7"     "Klrk1"     "Tspo"      "Sag"       "Pcgf2"     "Usp18"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")

DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB) +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB)

DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)

decide PCs to use
ElbowPlot(GEX.seur,ndims = 50)

PCs <- 1:25
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 10)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 1.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 24766
## Number of edges: 285496
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7557
## Number of communities: 27
## Elapsed time: 2 seconds
## 3 singletons identified. 24 final clusters.

Run UMAP/tSNE

GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 50, seed.use = 666)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 17:09:21 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:09:21 Read 24766 rows and found 25 numeric columns
## 17:09:21 Using Annoy for neighbor search, n_neighbors = 50
## 17:09:21 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:09:24 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpS6e24x\filecab8362635c
## 17:09:24 Searching Annoy index using 1 thread, search_k = 5000
## 17:09:37 Annoy recall = 100%
## 17:09:38 Commencing smooth kNN distance calibration using 1 thread
## 17:09:40 Initializing from normalized Laplacian + noise
## 17:09:42 Commencing optimization for 200 epochs, with 1898574 positive edges
## 17:10:15 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T, group.by = "seurat_clusters") + 
  DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters")

FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))

DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'tsne', group.by = "FB.info", split.by = "FB.info", 
        ncol = 5, cols = color.FB)

DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info", 
        ncol = 5, cols = color.FB)

DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "FB.info", cols = color.FB)

check some markers

markers.mig <- c("Ptprc",#"Cd3d","Cd3e","Cd19",
                 "Cd74","Lyz2","Ccl4",
                 "Aif1","P2ry12","C1qa","Spp1",
                 "Top2a","Pcna","Mki67","Mcm6",
                 "Cx3cr1","Il4ra","Il13ra1","Spp1-EGFP",
                 "Fabp5","Hmox1","Ms4a7","Cenpa")
FeaturePlot(GEX.seur, 
            features = markers.mig,
            ncol = 4)

DotPlot(GEX.seur, features = markers.mig, group.by = "FB.info",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)

DotPlot(GEX.seur, features = markers.mig, group.by = "seurat_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)

cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.info,
      clusters=GEX.seur$seurat_clusters)[c(3:12),],
                   main = "Cell Count",
                   gaps_row = c(3,7),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.info,
                                clusters=GEX.seur$seurat_clusters)[c(3:12),]),
                   main = "Cell Ratio",
                   gaps_row = c(3,7),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

markers

sort_clusters

GEX.seur$sort_clusters <- factor(GEX.seur$seurat_clusters,
                                 levels = c(1,3,0,2,10,9,
                                            19,21,
                                            4,16,13,11,20,
                                            5,12,14,17,
                                            18,6,8,7,15,22,23))
cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$FB.info,
      clusters=GEX.seur$sort_clusters)[c(3:12),],
                   main = "Cell Count",
                   gaps_row = c(3,7),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$FB.info,
                                clusters=GEX.seur$sort_clusters)[c(3:12),]),
                   main = "Cell Ratio",
                   gaps_row = c(3,7),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

DotPlot(GEX.seur, features = markers.mig, group.by = "sort_clusters",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))+ scale_y_discrete(limits=rev)

check some markers

check.Cell2017 <- c("Hexb","Cst3","P2ry12","Cx3cr1",
                    "Cd9","Ctsd","Cst7","Lpl",
                    "Mrc1","Cd163","S100a4","Cd74",
                    "Cd79b","Rag1","Trbc2","Nkg7",
                    "S100a9","Camp"
                    )

DotPlot(GEX.seur, features = check.Cell2017, group.by = "sort_clusters",
                         cols = c("midnightblue","darkorange1")) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))#+ scale_y_discrete(limits=rev)
## Warning in FetchData(object = object, vars = features, cells = cells): The
## following requested variables were not found: Rag1, Camp

markers.Sci2019 <- c("Tmem119","Hexb","Slc2a5","P2ry12","Siglech","Trem2","Sparc","Olfml3",  # Microglia
                     "Mrc1","Lyve1","Cd163","Siglec1","Pf4","Ms4a7","Cbr2",  # CAMs
                     "Ly6c2","Ccr2","Plac8","Anxa8","Nr4a1","Mertk","Zbtb26","Cd209a",  # Monocyte-derived
                     "Flt3","Zbtb46","Batf3","Itgae","Clec9a",  # DCs
                     "Tubb3"  # local added
                     )

DotPlot(GEX.seur, features = markers.Sci2019, group.by = "sort_clusters",
                         cols = c("midnightblue","darkorange1")) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))#+ scale_y_discrete(limits=rev)
## Warning in FetchData(object = object, vars = features, cells = cells): The
## following requested variables were not found: Anxa8

c("Xist","Tsix","Uty","Ddx3y","Eif2s3y","Kdm5d") %in% VariableFeatures(GEX.seur)
## [1] FALSE  TRUE FALSE  TRUE  TRUE FALSE
xy.genes <- c("Xist","Tsix","Uty","Ddx3y","Eif2s3y","Kdm5d")

DotPlot(GEX.seur, features = xy.genes, group.by = "sort_clusters",
                         cols = c("midnightblue","darkorange1")) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))#+ scale_y_discrete(limits=rev)

xy.genes <- c("Xist","Tsix","Uty","Ddx3y","Eif2s3y","Kdm5d")

DotPlot(GEX.seur, features = xy.genes, group.by = "FB.info",
                         cols = c("midnightblue","darkorange1")) +
                 theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1))#+ scale_y_discrete(limits=rev)

all markers

# find markers for every cluster compared to all remaining cells, report only the positive ones
Idents(GEX.seur) <- "sort_clusters"

#GEX.markers.pre <- FindAllMarkers(GEX.seur, only.pos = TRUE, min.pct = 0.05,
#                                  test.use = "MAST",
#                                  #test.use = "wilcox",
#                                  logfc.threshold = 0.25)
GEX.markers.pre <- read.table("sc10x_LYN.marker.sort0829.csv", header = TRUE, sep = ",")
GEX.markers.pre %>% group_by(cluster) %>% top_n(n = 8, wt = avg_log2FC)
## # A tibble: 190 x 7
## # Groups:   cluster [24]
##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene  
##        <dbl>      <dbl> <dbl> <dbl>     <dbl>   <int> <chr> 
##  1 0              1.14  0.76  0.354 0               1 Crybb1
##  2 0              1.06  0.998 0.916 0               1 P2ry12
##  3 0              0.958 0.723 0.341 0               1 Ecscr 
##  4 0              0.934 0.995 0.847 0               1 Fcrls 
##  5 0              0.932 0.85  0.565 0               1 Cd164 
##  6 0              0.877 0.946 0.754 0               1 Maf   
##  7 3.69e-264      0.912 0.674 0.349 6.50e-260       1 Slc2a5
##  8 2.21e-209      0.884 0.539 0.25  3.88e-205       1 Sox4  
##  9 2.22e-290      0.752 0.997 0.918 3.91e-286       3 P2ry12
## 10 2.98e-212      0.713 0.976 0.853 5.24e-208       3 Fcrls 
## # ... with 180 more rows
GEX.markers.pre$cluster <- factor(as.character(GEX.markers.pre$cluster),
                          levels = levels(GEX.seur$sort_clusters))



markers.pre_t32 <- (GEX.markers.pre %>% group_by(cluster) %>% 
                  filter(pct.1>0.1) %>%
                   top_n(n = 32, wt = avg_log2FC) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene


markers.pre_t48 <- (GEX.markers.pre %>% group_by(cluster) %>% 
                  filter(pct.1>0.05 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.pre$gene,invert = T,value = T)) %>%
                   top_n(n = 48, wt = avg_log2FC) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene

markers.pre_t60 <- (GEX.markers.pre %>% group_by(cluster) %>% 
                  filter(pct.1>0.01 & gene %in% grep("Rps|Rpl|mt-",GEX.markers.pre$gene,invert = T,value = T)) %>%
                   top_n(n = 60, wt = avg_log2FC) %>%
                    filter(p_val_adj < 0.01) %>%
                   ungroup() %>%
  arrange(desc(avg_log2FC*pct.1),gene) %>%
                             distinct(gene, .keep_all = TRUE) %>%
                             arrange(cluster,p_val_adj))$gene
DotPlot(GEX.seur, features = rev(markers.pre_t60[1:64]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(markers.pre_t60[65:128]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(markers.pre_t60[129:192]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(markers.pre_t60[193:256]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(markers.pre_t60[257:320]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(markers.pre_t60[321:384]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(markers.pre_t60[385:448]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(markers.pre_t60[449:512]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(markers.pre_t60[513:576]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(markers.pre_t60[577:640]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DotPlot(GEX.seur, features = rev(markers.pre_t60[641:689]))  + coord_flip() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6))

DoubletFinder

library(DoubletFinder)
## Loading required package: KernSmooth
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## Loading required package: ROCR

## NULL
## [1] "Creating 8255 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
## [1] "Creating 8255 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0.05, group.by = "sort_clusters")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0, group.by = "sort_clusters")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0.05, group.by = "sort_clusters", split.by = "DoubletFinder0.05")

VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"), 
        ncol = 2,
        pt.size = 0.05, group.by = "sort_clusters", split.by = "DoubletFinder0.1")

table(GEX.seur@meta.data[,c("sort_clusters","DoubletFinder0.05")])
##              DoubletFinder0.05
## sort_clusters Doublet Singlet
##            1      217    2336
##            3       43    1782
##            0        1    2620
##            2       44    2349
##            10      74    1051
##            9       72    1074
##            19      41     173
##            21       0     204
##            4       76    1481
##            16      29     316
##            13      41     715
##            11     103    1013
##            20      19     194
##            5       15    1505
##            12      61    1045
##            14      38     514
##            17      22     315
##            18       1     244
##            6      102    1385
##            8      131    1290
##            7       65    1407
##            15      32     391
##            22      11      92
##            23       0      32
table(GEX.seur@meta.data[,c("sort_clusters","DoubletFinder0.1")])
##              DoubletFinder0.1
## sort_clusters Doublet Singlet
##            1      282    2271
##            3       74    1751
##            0        8    2613
##            2       75    2318
##            10      99    1026
##            9      127    1019
##            19      64     150
##            21       1     203
##            4      123    1434
##            16      65     280
##            13      97     659
##            11     225     891
##            20     154      59
##            5       27    1493
##            12     102    1004
##            14     115     437
##            17      37     300
##            18       1     244
##            6      200    1287
##            8      314    1107
##            7      163    1309
##            15      68     355
##            22      53      50
##            23       3      29

preAnno

# preAnno
GEX.seur$preAnno <- as.character(GEX.seur$seurat_clusters)

GEX.seur$preAnno[GEX.seur$preAnno %in% setdiff(levels(GEX.seur$seurat_clusters), c(21,23,22,15))] <- "Microglia"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(21)] <- "MIG.lowUMI"

GEX.seur$preAnno[GEX.seur$preAnno %in% c(23)] <- "DC"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(22)] <- "BAM"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(15)] <- "Mono"

GEX.seur$preAnno <- factor(GEX.seur$preAnno,
                           levels = c("Microglia",
                                      "MIG.lowUMI","DC","BAM","Mono"))

# cnt
# EAE - SIM - AD
# CTL - MIG - GFP
GEX.seur$cnt <- factor(gsub(".1|.2","",as.character(GEX.seur$FB.info)),
                       levels = c("EAE.CTL","EAE.MIG","EAE.GFP",
                                  "SIM.CTL","SIM.MIG","SIM.GFP",
                                  "AD.CTL","AD.MIG","AD.GFP"))

color.cnt <- color.FB[c(3,2,1,
                        10,9,8,
                        7,6,4)]
DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "preAnno") +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.seur$cnt,
      clusters=GEX.seur$preAnno),
                   main = "Cell Count",
                   gaps_row = c(3,6),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.seur$cnt,
                                clusters=GEX.seur$preAnno)),
                   main = "Cell Ratio",
                   gaps_row = c(3,6),
      #gaps_col = c(12,13,14),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 2)

#saveRDS(GEX.seur, "sc10x_LYN.marker.sort0829.rds")